Package installation
aheads <- as.numeric(params$weeks)
# if(params$outcome=="cases") {
# #This is for the cases:
# url_case <- "https://forecast-eval.s3.us-east-2.amazonaws.com/score_cards_state_cases.rds"
# download.file(url_case, "eval_cases.RDS") # download to disk
# scores <- readRDS(paste0(here(), "/eval_cases.RDS"))
# scores <- subset(scores, forecaster %in% params$forecasters)
# }
# if(params$outcome=="deaths"){
url_deaths <- "https://forecast-eval.s3.us-east-2.amazonaws.com/score_cards_state_deaths.rds"
download.file(url_deaths, "eval_deaths.RDS") # download to disk
scores <- readRDS(paste0(here(), "/eval_deaths.RDS"))
scores <- subset(scores, forecaster %in% params$forecasters)
# }
primary_forecaster <- params$primary_forecaster
our_pred_dates <-
scores %>%
filter(forecaster == "CMU-TimeSeries")
our_pred_dates <- unique(our_pred_dates$forecast_date)
n_dates <- length(our_pred_dates)
# n_dates - 4 is often the most recent date with ground truth for 4 weeks ahead
# dates spaced out 2 weeks ahead to see different behaviors
forecast_dates <- our_pred_dates[n_dates- 2 *5:2]
scores$forecast_date <-
if_else(scores$forecaster %in% c("Karlen-pypm", "CU-select"), scores$forecast_date + 1, scores$forecast_date)
scores <- subset(scores, forecast_date %in% forecast_dates & ahead <= aheads)
results <- intersect_averagers(scores, c("forecaster"), c("forecast_date", "geo_value")) %>%
select(c("ahead", "geo_value", "forecaster","forecast_date", "data_source", "signal","target_end_date","incidence_period","actual","wis","ae","cov_80"))
results %>%
group_by(forecast_date) %>%
summarise(n_distinct(geo_value))
Retrieving prediction data of the selected forecasters
results %>%
download_this(
output_name = "results",
output_extension = ".csv",
button_label = "Download Predictions Evaluation",
button_type = "success",
has_icon = TRUE,
csv2 = FALSE,
icon = "fa fa-save"
)
NOTE: Results are based on the following numbers of common locations
By Weeks Ahead
weeks.label = c("1 week ahead", "2 weeks ahead", "3 weeks ahead", "4 weeks ahead")
names(weeks.label) = c(1, 2, 3, 4)
subtitle = sprintf("Forecasts made over %s to %s",
format(min(forecast_dates), "%B %d, %Y"),
format(max(forecast_dates), "%B %d, %Y"))
plot_ae <-
plot_canonical(results,
x = "ahead",
y = "ae",
aggr = mean) +
labs(title = subtitle, x= "Weeks Ahead", y = "Mean AE",color='Forecasters') +
# geom_line(aes(linetype=forecaster, color=forecaster)) +
geom_point(aes(color=forecaster, text=sprintf("Weeks Ahead: %s<br>Average Error: %s <br>Forecaster: %s",
ahead,
round(ae, digits=2),
color)),
alpha = 0.05) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_y_log10()
if (params$colorblind_palette) {
plot_ae <- plot_ae +
scale_color_viridis_d()
}
ggplotly(plot_ae, tooltip="text", width=1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
By Forecast Dates
plot_wis <-
plot_canonical(results,
x = "forecast_date",
y = "wis",
aggr = mean,
grp_vars = c("forecaster","ahead"),
facet_rows = "ahead") +
labs(title = subtitle,
x = "Forecast Dates",
y = "Mean WIS",
color = "Forecasters") +
geom_point(aes(text=sprintf("Forecast Date: %s<br>Mean WIS: %s <br>Forecaster: %s",
forecast_date,
round(wis, digits = 2),
color)),
alpha = 0.05) +
facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead=weeks.label)) +
theme(strip.background = element_rect(fill = "white")) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
scale_y_log10()
if (params$colorblind_palette) {
plot_wis <- plot_wis +
scale_color_viridis_d()
}
ggplotly(plot_wis, tooltip="text", height=800, width= 1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
By Forecast Dates
plot_cov80 <-
plot_canonical(results,
x = "forecast_date",
y = "cov_80",
aggr = mean,
grp_vars = c("forecaster","ahead"),
facet_rows = "ahead") +
labs(title = subtitle, x= "Forecast date", y = "Mean Coverage 80", color='Forecasters') +
geom_point(aes(text = sprintf("Forecast Date: %s<br>Coverage: %s <br>Forecaster: %s",
forecast_date,
round(cov_80, digits = 2),
color)),
alpha = 0.05) +
facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead = weeks.label)) +
theme(strip.background = element_rect(fill = "white")) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
geom_line(mapping = aes(y = .8), )
if (params$colorblind_palette) {
plot_cov80 <- plot_cov80 +
scale_color_viridis_d()
}
ggplotly(plot_cov80, tooltip="text", height=800, width=1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
Relative to baseline; scale first then take the geometric mean, ignoring a few 0’s.
geom_mean <- function(x) prod(x)^(1/length(x))
#geom_mean <- exp(mean(log((x+1)/(y+1)))) #still need to figure this out
mean_wis <-
plot_canonical(results %>%
filter(wis > 0),
x = "ahead",
y = "wis",
aggr = geom_mean,
base_forecaster = "COVIDhub-baseline",
scale_before_aggr = TRUE) +
labs(title = subtitle,
x = "Weeks Ahead",
y = "Geometric mean relative WIS",
color = "Forecasters") +
geom_point(aes(text = sprintf("Weeks Ahead: %s<br>WIS: %s <br>Forecaster: %s",
ahead,
round(wis, digits = 2),
color)),
alpha = 0.05) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
geom_line(mapping = aes(y = 1))
if (params$colorblind_palette) {
mean_wis <- mean_wis +
scale_color_viridis_d()
}
ggplotly(mean_wis, tooltip="text", width= 1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
mean_wis_forecast_date <-
plot_canonical(results %>%
filter(wis > 0),
x = "forecast_date",
y = "wis",
aggr = geom_mean,
facet_rows = "ahead",
grp_vars = c("forecaster", "ahead"),
base_forecaster = "COVIDhub-baseline",
scale_before_aggr = TRUE) +
theme(legend.position = "bottom") +
labs(title = subtitle,
x = "Forecast date",
y = "Geometric mean relative WIS",
color = "Forecasters") +
geom_point(aes(text = sprintf("Forecast Date: %s<br>WIS: %s <br>Forecaster: %s",
forecast_date,
round(wis, digits = 2),
color)),
alpha = 0.05) +
facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead = weeks.label)) +
theme(strip.background = element_rect(fill = "white")) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
geom_line(mapping = aes(y = 1))
if (params$colorblind_palette) {
mean_wis_forecast_date <- mean_wis_forecast_date +
scale_color_viridis_d()
}
ggplotly(mean_wis_forecast_date, tooltip = "text", height=800, width= 1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
mean score over forecast dates and aheads Note that the results are scaled by population.
library(sf)
maps <- results %>%
group_by(geo_value, forecaster) %>%
summarise(across(wis:cov_80, mean)) %>%
left_join(animalia::state_population, by = "geo_value") %>%
mutate(across(wis:cov_80, ~ .x / population * 1e5)) %>%
pivot_longer(wis:cov_80, names_to = "score") %>%
group_by(score) %>%
mutate(time_value = Sys.Date(),
r = max(value)) %>%
group_by(forecaster, .add = TRUE) %>%
group_split()
# for county prediction, set geo_type = "county"
maps <- purrr::map(maps,
~as.covidcast_signal(
.x, signal = .x$score[1],
data_source = .x$forecaster[1],
geo_type = "state"))
maps <- purrr::map(maps,
~plot(.x,
choro_col = scales::viridis_pal()(3),
range = c(0,.x$r[1])))
nfcasts <- length(unique(results$forecaster))
# original code
cowplot::plot_grid(plotlist = maps[1:nfcasts], ncol = 3)
cowplot::plot_grid(plotlist = maps[(nfcasts+1):(nfcasts*2)], ncol = 3)
cowplot::plot_grid(plotlist = maps[((nfcasts*2)+1):length(maps)], ncol = 3)
For county predictions, you might want to change the fig.height and fig.width chunk options here (in other notebooks we use fig.height = 120 and fig.width = 30).
options(timeout=200)
url4 <- "https://forecast-eval.s3.us-east-2.amazonaws.com/predictions_cards.rds"
download.file(url4, "predictions.RDS") # download to disk
state_predictions <- readRDS(paste0(here(), "/predictions.RDS"))
# Check how many aheads in the state_predictions data
# Check the forecast_end_date column
# for county prediction, set geo_type = "county"
state.label = c(state.name, "Washington D.C.", "Porto Rico")
names(state.label) = c(tolower(state.abb), "dc", "pr")
# trajectory plots for selected forecaster
pd <- evalcast:::setup_plot_trajectory(
state_predictions %>% filter(forecaster=="CMU-TimeSeries" & forecast_date %in% forecast_dates),
intervals = 0.8,
geo_type = "state",
start_day = min(forecast_dates) - 60)
g <- ggplot(pd$truth_df, mapping = aes(x = target_end_date))
# build the fan
g <- g + geom_ribbon(
data = pd$quantiles_df %>%
mutate(upper = round(upper, 2),
lower = round(lower, 2)),
mapping = aes(ymin = lower,
ymax = upper,
fill = forecaster,
group = interaction(forecaster, forecast_date)),
alpha = .1) +
scale_fill_viridis_d(begin=.15, end=.85)
# line layer
g <- g +
#geom_line(aes(y = .data$value.y), color = "#3182BD") + # corrected
geom_line(aes(y = value), size = .5) + # reported
geom_line(data = pd$points_df,
mapping = aes(y = value,
color = forecaster,
group = interaction(forecaster, forecast_date)),
size = .5) +
geom_point(aes(y = value), size = 1) + # reported gets dots
geom_point(data = pd$points_df %>%
mutate(value = round(value, 2)),
mapping = aes(y = value, color = forecaster),
size = 1) +
scale_color_viridis_d(begin=.15, end=.85)
states <- g +
facet_wrap(~geo_value,
scales = "free_y",
ncol = 3,
labeller = labeller(geo_value = state.label)) +
labs(x = "", y = "") +
theme_bw() +
theme(legend.position = "none", strip.background = element_rect(fill = "white"))
ggplotly(states, height=5000, width= 1000)